(**************************************************************************)
(*                                                                        *)
(*                                 OCaml                                  *)
(*                                                                        *)
(*               Jeremy Yallop, University of Cambridge                   *)
(*               Gabriel Scherer, Project Parsifal, INRIA Saclay          *)
(*               Alban Reynaud, ENS Lyon                                  *)
(*                                                                        *)
(*   Copyright 2017 Jeremy Yallop                                         *)
(*   Copyright 2018 Alban Reynaud                                         *)
(*   Copyright 2018 INRIA                                                 *)
(*                                                                        *)
(*   All rights reserved.  This file is distributed under the terms of    *)
(*   the GNU Lesser General Public License version 2.1, with the          *)
(*   special exception on linking described in the file LICENSE.          *)
(*                                                                        *)
(**************************************************************************)

(** Static checking of recursive declarations, as described in

      A practical mode system for recursive definitions
      Alban Reynaud, Gabriel Scherer and Jeremy Yallop
      POPL 2021

Some recursive definitions are meaningful
{[
  let rec factorial = function 0 -> 1 | n -> n * factorial (n - 1)
  let rec infinite_list = 0 :: infinite_list
]}
but some other are meaningless
{[
  let rec x = x
  let rec x = x+1
]}

Intuitively, a recursive definition makes sense when the body of the
definition can be evaluated without fully knowing what the recursive
name is yet.

In the [factorial] example, the name [factorial] refers to a function,
evaluating the function definition [function ...] can be done
immediately and will not force a recursive call to [factorial] -- this
will only happen later, when [factorial] is called with an argument.

In the [infinite_list] example, we can evaluate [0 :: infinite_list]
without knowing the full content of [infinite_list], but with just its
address. This is a case of productive/guarded recursion.

On the contrary, [let rec x = x] is unguarded recursion (the meaning
is undetermined), and [let rec x = x+1] would need the value of [x]
while evaluating its definition [x+1].

This file implements a static check to decide which definitions are
known to be meaningful, and which may be meaningless. In the general
case, we handle a set of mutually-recursive definitions
{[
let rec x1 = e1
and x2 = e2
...
and xn = en
]}


Our check (see function [is_valid_recursive_expression] is defined
using two criteria:

Usage of recursive variables: how does each of the [e1 .. en] use the
 recursive variables [x1 .. xn]?

Static or dynamic size: for which of the [ei] can we compute the
  in-memory size of the value without evaluating [ei] (so that we can
  pre-allocate it, and thus know its final address before evaluation).

The "static or dynamic size" is decided by the classify_* functions below.

The "variable usage" question is decided by a static analysis looking
very much like a type system. The idea is to assign "access modes" to
variables, where an "access mode" [m] is defined as either

    m ::= Ignore (* the value is not used at all *)
        | Delay (* the value is not needed at definition time *)
        | Guard (* the value is stored under a data constructor *)
        | Return (* the value result is directly returned *)
        | Dereference (* full access and inspection of the value *)

The access modes of an expression [e] are represented by a "context"
[G], which is simply a mapping from variables (the variables used in
[e]) to access modes.

The core notion of the static check is a type-system-like judgment of
the form [G |- e : m], which can be interpreted as meaning either of:

- If we are allowed to use the variables of [e] at the modes in [G]
  (but not more), then it is safe to use [e] at the mode [m].

- If we want to use [e] at the mode [m], then its variables are
  used at the modes in [G].

In practice, for a given expression [e], our implementation takes the
desired mode of use [m] as *input*, and returns a context [G] as
*output*, which is (uniquely determined as) the most permissive choice
of modes [G] for the variables of [e] such that [G |- e : m] holds.
*)

open Asttypes
open Typedtree
open Types

(** {1 Static or dynamic size} *)

type sd = Value_rec_types.recursive_binding_kind

let is_ref : Types.value_description -> bool = function
  | { Types.val_kind =
        Types.Val_prim { Primitive.prim_name = "%makemutable";
                          prim_arity = 1 } } ->
        true
  | _ -> false

(* See the note on abstracted arguments in the documentation for
    Typedtree.Texp_apply *)
let is_abstracted_arg : arg_label * apply_arg -> bool = function
  | (_, Omitted ()) -> true
  | (_, Arg _) -> false

let classify_expression : Typedtree.expression -> sd =
  (* We need to keep track of the size of expressions
      bound by local declarations, to be able to predict
      the size of variables. Compare:

        let rec r =
          let y = fun () -> r ()
          in y

      and

        let rec r =
          let y = if Random.bool () then ignore else fun () -> r ()
          in y

    In both cases the final address of `r` must be known before `y` is compiled,
    and this is only possible if `r` has a statically-known size.

    The first definition can be allowed (`y` has a statically-known
    size) but the second one is unsound (`y` has no statically-known size).
  *)
  let rec classify_expression env e : sd =
    match e.exp_desc with
    (* binding and variable cases *)
    | Texp_let (rec_flag, vb, e) ->
        let env = classify_value_bindings rec_flag env vb in
        classify_expression env e
    | Texp_ident (path, _, _) ->
        classify_path env path

    (* non-binding cases *)
    | Texp_sequence (_, e)
    | Texp_struct_item (_, e) ->
        classify_expression env e

    | Texp_construct (_, {cstr_tag = Cstr_unboxed}, [e]) ->
        classify_expression env e
    | Texp_construct _ ->
        Static

    | Texp_record { representation = Record_unboxed _;
                    fields = [| _, Overridden (_,e) |] } ->
        classify_expression env e
    | Texp_record _ ->
        Static

    | Texp_variant _
    | Texp_tuple _
    | Texp_atomic_loc _
    | Texp_extension_constructor _
    | Texp_constant _ ->
        Static

    | Texp_for _
    | Texp_setfield _
    | Texp_while _
    | Texp_setinstvar _ ->
        (* Unit-returning expressions *)
        Static

    | Texp_unreachable ->
        Static

    | Texp_apply ({exp_desc = Texp_ident (_, _, vd)}, _)
      when is_ref vd ->
        Static
    | Texp_apply (_,args)
      when List.exists is_abstracted_arg args ->
        Static
    | Texp_apply _ ->
        Dynamic

    | Texp_array _ ->
        Static
    | Texp_pack mexp ->
        classify_module_expression env mexp
    | Texp_function _ ->
        Static
    | Texp_lazy e ->
      (* The code below was copied (in part) from translcore.ml *)
      begin match Typeopt.classify_lazy_argument e with
      | `Constant_or_function ->
        (* A constant expr (of type <> float if [Config.flat_float_array] is
           true) gets compiled as itself. *)
          classify_expression env e
      | `Float_that_cannot_be_shortcut
      | `Identifier `Forward_value ->
          (* Forward blocks *)
          Static
      | `Identifier `Other ->
          classify_expression env e
      | `Other ->
          (* other cases compile to a lazy block holding a function *)
          Static
      end

    | Texp_new _
    | Texp_instvar _
    | Texp_object _
    | Texp_match _
    | Texp_ifthenelse _
    | Texp_send _
    | Texp_field _
    | Texp_assert _
    | Texp_try _
    | Texp_override _
    | Texp_letop _ ->
        Dynamic
  and classify_value_bindings rec_flag env bindings =
    (* We use a non-recursive classification, classifying each
        binding with respect to the old environment
        (before all definitions), even if the bindings are recursive.

        Note: computing a fixpoint in some way would be more
        precise, as the following could be allowed:

          let rec topdef =
            let rec x = y and y = fun () -> topdef ()
            in x
    *)
    ignore rec_flag;
    let old_env = env in
    let add_value_binding env vb =
      match vb.vb_pat.pat_desc with
      | Tpat_var (id, _loc, _uid) ->
          let size = classify_expression old_env vb.vb_expr in
          Ident.add id size env
      | _ ->
          (* Note: we don't try to compute any size for complex patterns *)
          env
    in
    List.fold_left add_value_binding env bindings
  and classify_path env : _ -> Value_rec_types.recursive_binding_kind = function
    | Path.Pident x ->
        begin
          try Ident.find_same x env
          with Not_found ->
            (* an identifier will be missing from the map if either:
                - it is a non-local identifier
                  (bound outside the letrec-binding we are analyzing)
                - or it is bound by a complex (let p = e in ...) local binding
                - or it is bound within a module (let module M = ... in ...)
                  that we are not traversing for size computation

                For non-local identifiers it might be reasonable (although
                not completely clear) to consider them Static (they have
                already been evaluated), but for the others we must
                under-approximate with Not_recursive.

                This could be fixed by a more complete implementation.
            *)
            Dynamic
        end
    | Path.Pdot _ | Path.Papply _ | Path.Pextra_ty _ ->
        (* local modules could have such paths to local definitions;
            classify_expression could be extend to compute module
            shapes more precisely *)
        Dynamic
  and classify_module_expression env mexp : sd =
    match mexp.mod_desc with
    | Tmod_ident (path, _) ->
        classify_path env path
    | Tmod_structure _ ->
        Static
    | Tmod_functor _ ->
        Static
    | Tmod_apply _ ->
        Dynamic
    | Tmod_apply_unit _ ->
        Dynamic
    | Tmod_constraint (mexp, _, _, coe) ->
        begin match coe with
        | Tcoerce_none ->
            classify_module_expression env mexp
        | Tcoerce_structure _ ->
            Static
        | Tcoerce_functor _ ->
            Static
        | Tcoerce_primitive _ ->
            Misc.fatal_error "letrec: primitive coercion on a module"
        | Tcoerce_alias _ ->
            Misc.fatal_error "letrec: alias coercion on a module"
        end
    | Tmod_unpack (e, _) ->
        classify_expression env e
  in classify_expression Ident.empty


(** {1 Usage of recursive variables} *)

module Mode = struct
  (** For an expression in a program, its "usage mode" represents
      static information about how the value produced by the expression
      will be used by the context around it. *)
  type t =
    | Ignore
    (** [Ignore] is for subexpressions that are not used at all during
       the evaluation of the whole program. This is the mode of
       a variable in an expression in which it does not occur. *)

    | Delay
    (** A [Delay] context can be fully evaluated without evaluating its argument
        , which will only be needed at a later point of program execution. For
        example, [fun x -> ?] or [lazy ?] are [Delay] contexts. *)

    | Guard
    (** A [Guard] context returns the value as a member of a data structure,
        for example a variant constructor or record. The value can safely be
        defined mutually-recursively with their context, for example in
        [let rec li = 1 :: li].
        When these subexpressions participate in a cyclic definition,
        this definition is productive/guarded.

        The [Guard] mode is also used when a value is not dereferenced,
        it is returned by a sub-expression, but the result of this
        sub-expression is discarded instead of being returned.
        For example, the subterm [?] is in a [Guard] context
        in [let _ = ? in e] and in [?; e].
        When these subexpressions participate in a cyclic definition,
        they cannot create a self-loop.
    *)

    | Return
    (** A [Return] context returns its value without further inspection.
        This value cannot be defined mutually-recursively with its context,
        as there is a risk of self-loop: in [let rec x = y and y = x], the
        two definitions use a single variable in [Return] context. *)

    | Dereference
    (** A [Dereference] context consumes, inspects and uses the value
        in arbitrary ways. Such a value must be fully defined at the point
        of usage, it cannot be defined mutually-recursively with its context. *)

  let equal = ((=) : t -> t -> bool)

  (* Lower-ranked modes demand/use less of the variable/expression they qualify
     -- so they allow more recursive definitions.

     Ignore < Delay < Guard < Return < Dereference
  *)
  let rank = function
    | Ignore -> 0
    | Delay -> 1
    | Guard -> 2
    | Return -> 3
    | Dereference -> 4

  (* Returns the more conservative (highest-ranking) mode of the two
     arguments.

     In judgments we write (m + m') for (join m m').
  *)
  let join m m' =
    if rank m >= rank m' then m else m'

  (* If x is used with the mode m in e[x], and e[x] is used with mode
     m' in e'[e[x]], then x is used with mode m'[m] (our notation for
     "compose m' m") in e'[e[x]].

     Return is neutral for composition: m[Return] = m = Return[m].

     Composition is associative and [Ignore] is a zero/annihilator for
     it: (compose Ignore m) and (compose m Ignore) are both Ignore. *)
  let compose m' m = match m', m with
    | Ignore, _ | _, Ignore -> Ignore
    | Dereference, _ -> Dereference
    | Delay, _ -> Delay
    | Guard, Return -> Guard
    | Guard, ((Dereference | Guard | Delay) as m) -> m
    | Return, Return -> Return
    | Return, ((Dereference | Guard | Delay) as m) -> m
end

type mode = Mode.t = Ignore | Delay | Guard | Return | Dereference

module Env :
sig
  type t

  val single : Ident.t -> Mode.t -> t
  (** Create an environment with a single identifier used with a given mode.
  *)

  val empty : t
  (** An environment with no used identifiers. *)

  val find : Ident.t -> t -> Mode.t
  (** Find the mode of an identifier in an environment.  The default mode is
      Ignore. *)

  val unguarded : t -> Ident.t list -> Ident.t list
  (** unguarded e l: the list of all identifiers in l that are dereferenced or
      returned in the environment e. *)

  val dependent : t -> Ident.t list -> Ident.t list
  (** dependent e l: the list of all identifiers in l that are used in e
      (not ignored). *)

  val join : t -> t -> t
  val join_list : t list -> t
  (** Environments can be joined pointwise (variable per variable) *)

  val compose : Mode.t -> t -> t
  (** Environment composition m[G] extends mode composition m1[m2]
      by composing each mode in G pointwise *)

  val remove : Ident.t -> t -> t
  (** Remove an identifier from an environment. *)

  val take: Ident.t -> t -> Mode.t * t
  (** Remove an identifier from an environment, and return its mode *)

  val remove_list : Ident.t list -> t -> t
  (** Remove all the identifiers of a list from an environment. *)

  val equal : t -> t -> bool
end = struct
  module M = Map.Make(Ident)

  (** A "t" maps each rec-bound variable to an access status *)
  type t = Mode.t M.t

  let equal = M.equal Mode.equal

  let find (id: Ident.t) (tbl: t) =
    try M.find id tbl with Not_found -> Ignore

  let empty = M.empty

  let join (x: t) (y: t) =
    M.fold
      (fun (id: Ident.t) (v: Mode.t) (tbl: t) ->
         let v' = find id tbl in
         M.add id (Mode.join v v') tbl)
      x y

  let join_list li = List.fold_left join empty li

  let compose m env =
    M.map (Mode.compose m) env

  let single id mode = M.add id mode empty

  let unguarded env li =
    List.filter (fun id -> Mode.rank (find id env) > Mode.rank Guard) li

  let dependent env li =
    List.filter (fun id -> Mode.rank (find id env) > Mode.rank Ignore) li

  let remove = M.remove

  let take id env = (find id env, remove id env)

  let remove_list l env =
    List.fold_left (fun env id -> M.remove id env) env l
end

let remove_pat pat env =
  Env.remove_list (pat_bound_idents pat) env

let remove_patlist pats env =
  List.fold_right remove_pat pats env

(* Usage mode judgments.

   There are two main groups of judgment functions:

   - Judgments of the form "G |- ... : m"
     compute the environment G of a subterm ... from its mode m, so
     the corresponding function has type [... -> Mode.t -> Env.t].

     We write [... -> term_judg] in this case.

   - Judgments of the form "G |- ... : m -| G'"

     correspond to binding constructs (for example "let x = e" in the
     term "let x = e in body") that have both an exterior environment
     G (the environment of the whole term "let x = e in body") and an
     interior environment G' (the environment at the "in", after the
     binding construct has introduced new names in scope).

     For example, let-binding could be given the following rule:

       G |- e : m + m'
       -----------------------------------
       G+G' |- (let x = e) : m -| x:m', G'

     Checking the whole term composes this judgment
     with the "G |- e : m" form for the let body:

       G  |- (let x = e) : m -| G'
       G' |- body : m
       -------------------------------
       G |- let x = e in body : m

     To this judgment "G |- e : m -| G'" our implementation gives the
     type [... -> Mode.t -> Env.t -> Env.t]: it takes the mode and
     interior environment as inputs, and returns the exterior
     environment.

     We write [... -> bind_judg] in this case.
*)
type term_judg = Mode.t -> Env.t
type bind_judg = Mode.t -> Env.t -> Env.t

let option : 'a. ('a -> term_judg) -> 'a option -> term_judg =
  fun f o m -> match o with
    | None -> Env.empty
    | Some v -> f v m
let list : 'a. ('a -> term_judg) -> 'a list -> term_judg =
  fun f li m ->
    List.fold_left (fun env item -> Env.join env (f item m)) Env.empty li
let array : 'a. ('a -> term_judg) -> 'a array -> term_judg =
  fun f ar m ->
    Array.fold_left (fun env item -> Env.join env (f item m)) Env.empty ar

let single : Ident.t -> term_judg = Env.single
let remove_ids : Ident.t list -> term_judg -> term_judg =
  fun ids f m -> Env.remove_list ids (f m)

let join : term_judg list -> term_judg =
  fun li m -> Env.join_list (List.map (fun f -> f m) li)

let empty = fun _ -> Env.empty

(* A judgment [judg] takes a mode from the context as input, and
   returns an environment. The judgment [judg << m], given a mode [m']
   from the context, evaluates [judg] in the composed mode [m'[m]]. *)
let (<<) : term_judg -> Mode.t -> term_judg =
  fun f inner_mode -> fun outer_mode -> f (Mode.compose outer_mode inner_mode)

(* A binding judgment [binder] expects a mode and an inner environment,
   and returns an outer environment. [binder >> judg] computes
   the inner environment as the environment returned by [judg]
   in the ambient mode. *)
let (>>) : bind_judg -> term_judg -> term_judg =
  fun binder term mode -> binder mode (term mode)

(* Expression judgment:
     G |- e : m
   where (m) is an input of the code and (G) is an output;
   in the Prolog mode notation, this is (+G |- -e : -m).
*)
let rec expression : Typedtree.expression -> term_judg =
  fun exp -> match exp.exp_desc with
    | Texp_ident (pth, _, _) ->
      path pth
    | Texp_let (rec_flag, bindings, body) ->
      (*
         G  |- <bindings> : m -| G'
         G' |- body : m
         -------------------------------
         G |- let <bindings> in body : m
      *)
      value_bindings rec_flag bindings >> expression body
    | Texp_match (e, cases, eff_cases, _) ->
      (* TODO: update comment below for eff_cases
         (Gi; mi |- pi -> ei : m)^i
         G |- e : sum(mi)^i
         ----------------------------------------------
         G + sum(Gi)^i |- match e with (pi -> ei)^i : m
       *)
      (fun mode ->
        let pat_envs, pat_modes =
          List.split (List.map (fun c -> case c mode) cases) in
        let env_e = expression e (List.fold_left Mode.join Ignore pat_modes) in
        let eff_envs, eff_modes =
          List.split (List.map (fun c -> case c mode) eff_cases) in
        let eff_e = expression e (List.fold_left Mode.join Ignore eff_modes) in
        Env.join_list
          ((Env.join_list (env_e :: pat_envs)) :: (eff_e :: eff_envs)))
    | Texp_for (_, _, low, high, _, body) ->
      (*
        G1 |- low: m[Dereference]
        G2 |- high: m[Dereference]
        G3 |- body: m[Guard]
        ---
        G1 + G2 + G3 |- for _ = low to high do body done: m
      *)
      join [
        expression low << Dereference;
        expression high << Dereference;
        expression body << Guard;
      ]
    | Texp_constant _ ->
      empty
    | Texp_new (pth, _, _) ->
      (*
        G |- c: m[Dereference]
        -----------------------
        G |- new c: m
      *)
      path pth << Dereference
    | Texp_instvar (self_path, pth, _inst_var) ->
        join [path self_path << Dereference; path pth]
    | Texp_apply ({exp_desc = Texp_ident (_, _, vd)}, [_, Arg arg])
      when is_ref vd ->
      (*
        G |- e: m[Guard]
        ------------------
        G |- ref e: m
      *)
      expression arg << Guard
    | Texp_apply (e, args)  ->
        (* [args] may contain omitted arguments, corresponding to labels in
           the function's type that were not passed in the actual application.
           The arguments before the first omitted argument are passed to the
           function immediately, so they are dereferenced. The arguments after
           the first omitted one are stored in a closure, so guarded.
           The function itself is called immediately (dereferenced) if there
           is at least one argument before the first omitted one.
           On the other hand, if the first argument is omitted then the
           function is stored in the closure without being called. *)
        let rec split_args ~has_omitted_arg = function
          | [] -> [], []
          | (_, Omitted ()) :: rest -> split_args ~has_omitted_arg:true rest
          | (_, Arg arg) :: rest ->
            let applied, delayed = split_args ~has_omitted_arg rest in
            if has_omitted_arg
            then applied, arg :: delayed
            else arg :: applied, delayed
        in
        let applied, delayed = split_args ~has_omitted_arg:false args in
        let function_mode =
          match applied with
          | [] -> Guard
          | _ :: _ -> Dereference
        in
        join [expression e << function_mode;
              list expression applied << Dereference;
              list expression delayed << Guard]
    | Texp_tuple exprs ->
      list expression (List.map snd exprs) << Guard
    | Texp_atomic_loc (expr, _, _) ->
      expression expr << Guard
    | Texp_array (_, exprs) ->
      let array_mode = match Typeopt.array_kind exp with
        | Lambda.Pfloatarray ->
            (* (flat) float arrays unbox their elements *)
            Dereference
        | Lambda.Pgenarray ->
            (* This is counted as a use, because constructing a generic array
               involves inspecting to decide whether to unbox (PR#6939). *)
            Dereference
        | Lambda.Paddrarray | Lambda.Pintarray ->
            (* non-generic, non-float arrays act as constructors *)
            Guard
      in
      list expression exprs << array_mode
    | Texp_construct (_, desc, exprs) ->
      let access_constructor =
        match desc.cstr_tag with
        | Cstr_extension (pth, _) ->
          path pth << Dereference
        | _ -> empty
      in
      let m' = match desc.cstr_tag with
        | Cstr_unboxed ->
          Return
        | Cstr_constant _ | Cstr_block _ | Cstr_extension _ ->
          Guard
      in
      join [
        access_constructor;
        list expression exprs << m'
      ]
    | Texp_variant (_, eo) ->
      (*
        G |- e: m[Guard]
        ------------------   -----------
        G |- `A e: m         [] |- `A: m
      *)
      option expression eo << Guard
    | Texp_record { fields = es; extended_expression = eo;
                    representation = rep } ->
        let field_mode = match rep with
          | Record_float -> Dereference
          | Record_unboxed _ -> Return
          | Record_regular | Record_inlined _
          | Record_extension _ -> Guard
        in
        let field (_label, field_def) = match field_def with
            Kept _ -> empty
          | Overridden (_, e) -> expression e
        in
        join [
          array field es << field_mode;
          option expression eo << Dereference
        ]
    | Texp_ifthenelse (cond, ifso, ifnot) ->
      (*
        Gc |- c: m[Dereference]
        G1 |- e1: m
        G2 |- e2: m
        ---
        Gc + G1 + G2 |- if c then e1 else e2: m

      Note: `if c then e1 else e2` is treated in the same way as
      `match c with true -> e1 | false -> e2`
      *)
      join [
        expression cond << Dereference;
        expression ifso;
        option expression ifnot;
      ]
    | Texp_setfield (e1, _, _, e2) ->
      (*
        G1 |- e1: m[Dereference]
        G2 |- e2: m[Dereference]
        ---
        G1 + G2 |- e1.x <- e2: m

        Note: e2 is dereferenced in the case of a field assignment to
        a record of unboxed floats in that case, e2 evaluates to
        a boxed float and it is unboxed on assignment.
      *)
      join [
        expression e1 << Dereference;
        expression e2 << Dereference;
      ]
    | Texp_sequence (e1, e2) ->
      (*
        G1 |- e1: m[Guard]
        G2 |- e2: m
        --------------------
        G1 + G2 |- e1; e2: m

        Note: `e1; e2` is treated in the same way as `let _ = e1 in e2`
      *)
      join [
        expression e1 << Guard;
        expression e2;
      ]
    | Texp_while (cond, body) ->
      (*
        G1 |- cond: m[Dereference]
        G2 |- body: m[Guard]
        ---------------------------------
        G1 + G2 |- while cond do body done: m
      *)
      join [
        expression cond << Dereference;
        expression body << Guard;
      ]
    | Texp_send (e1, _) ->
      (*
        G |- e: m[Dereference]
        ---------------------- (plus weird 'eo' option)
        G |- e#x: m
      *)
      join [
        expression e1 << Dereference
      ]
    | Texp_field (e, _, _) ->
      (*
        G |- e: m[Dereference]
        -----------------------
        G |- e.x: m
      *)
      expression e << Dereference
    | Texp_setinstvar (pth,_,_,e) ->
      (*
        G |- e: m[Dereference]
        ----------------------
        G |- x <- e: m
      *)
      join [
        path pth << Dereference;
        expression e << Dereference;
      ]
    | Texp_assert (e, _) ->
      (*
        G |- e: m[Dereference]
        -----------------------
        G |- assert e: m

        Note: `assert e` is treated just as if `assert` was a function.
      *)
      expression e << Dereference
    | Texp_pack mexp ->
      (*
        G |- M: m
        ----------------
        G |- module M: m
      *)
      modexp mexp
    | Texp_object (clsstrct, _) ->
      class_structure clsstrct
    | Texp_try (e, cases, eff_cases) ->
      (*
        G |- e: m      (Gi; _ |- pi -> ei : m)^i
        --------------------------------------------
        G + sum(Gi)^i |- try e with (pi -> ei)^i : m

        Contrarily to match, the patterns p do not inspect
        the value of e, so their mode does not influence the
        mode of e.
      *)
      let case_env c m = fst (case c m) in
      join [
        expression e;
        list case_env cases;
        list case_env eff_cases;
      ]
    | Texp_override (pth, fields) ->
      (*
         G |- pth : m   (Gi |- ei : m[Dereference])^i
         ----------------------------------------------------
         G + sum(Gi)^i |- {< (xi = ei)^i >} (at path pth) : m

         Note: {< .. >} is desugared to a function application, but
         the function implementation might still use its arguments in
         a guarded way only -- intuitively it should behave as a constructor.
         We could possibly refine the arguments' Dereference into Guard here.
      *)
      let field (_, _, arg) = expression arg in
      join [
        path pth << Dereference;
        list field fields << Dereference;
      ]
    | Texp_function (params, body) ->
      (*
         G      |-{body} b  : m[Delay]
         (Hj    |-{def}  Pj : m[Delay])^j
         H  := sum(Hj)^j
         ps := sum(pat(Pj))^j
         -----------------------------------
         G + H - ps |- fun (Pj)^j -> b : m
      *)
      let param_pat param =
        (* param P ::=
            | ?(pat = expr)
            | pat

          Define pat(P) as
              pat if P = ?(pat = expr)
              pat if P = pat
          *)
        match param.fp_kind with
        | Tparam_pat pat -> pat
        | Tparam_optional_default (pat, _) -> pat
      in
      (* Optional argument defaults.

          G |-{def} P : m
      *)
      let param_default param =
        match param.fp_kind with
        | Tparam_optional_default (_, default) ->
          (*
              G |- e : m
              ------------------
              G |-{def} ?(p=e) : m
          *)
            expression default
        | Tparam_pat _ ->
          (*
              ------------------
              . |-{def} p : m
          *)
            empty
      in
      let patterns = List.map param_pat params in
      let defaults = List.map param_default params in
      let body = function_body body in
      let f = join (body :: defaults) << Delay in
      (fun m ->
         let env = f m in
         remove_patlist patterns env)
    | Texp_lazy e ->
      (*
        G |- e: m[Delay]
        ----------------  (modulo some subtle compiler optimizations)
        G |- lazy e: m
      *)
      let lazy_mode = match Typeopt.classify_lazy_argument e with
        | `Constant_or_function
        | `Identifier _
        | `Float_that_cannot_be_shortcut ->
          Return
        | `Other ->
          Delay
      in
      expression e << lazy_mode
    | Texp_letop{let_; ands; body; _} ->
        let case_env c m = fst (case c m) in
        join [
          list binding_op (let_ :: ands) << Dereference;
          case_env body << Delay
        ]
    | Texp_unreachable ->
      (*
        ----------
        [] |- .: m
      *)
      empty
    | Texp_extension_constructor (_lid, pth) ->
      path pth << Dereference
    | Texp_struct_item (si, e) ->
      structure_item si >> expression e

(* Function bodies.

    G |-{body} b : m
*)
and function_body body =
  match body with
  | Tfunction_body body ->
    (*
        G |- e : m
        ------------------
        G |-{body} e : m (**)

      (**) The "e" here stands for [Tfunction_body] as opposed to
           [Tfunction_cases].
    *)
      expression body
  | Tfunction_cases { cases; _ } ->
    (*
        (Gi; _ |- pi -> ei : m)^i    (**)
        ------------------
        sum(Gi)^i |-{body} function (pi -> ei)^i : m

      (**) Contrarily to match, the values that are pattern-matched
           are bound locally, so the pattern modes do not influence
           the final environment.
    *)
      List.map (fun c mode -> fst (case c mode)) cases
      |> join

and binding_op : Typedtree.binding_op -> term_judg =
  fun bop ->
    join [path bop.bop_op_path; expression bop.bop_exp]

and class_structure : Typedtree.class_structure -> term_judg =
  fun cs -> list class_field cs.cstr_fields

and class_field : Typedtree.class_field -> term_judg =
  fun cf -> match cf.cf_desc with
    | Tcf_inherit (_, ce, _super, _inh_vars, _inh_meths) ->
      class_expr ce << Dereference
    | Tcf_val (_lab, _mut, _, cfk, _) ->
      class_field_kind cfk
    | Tcf_method (_, _, cfk) ->
      class_field_kind cfk
    | Tcf_constraint _ ->
      empty
    | Tcf_initializer e ->
      expression e << Dereference
    | Tcf_attribute _ ->
      empty

and class_field_kind : Typedtree.class_field_kind -> term_judg =
  fun cfk -> match cfk with
    | Tcfk_virtual _ ->
      empty
    | Tcfk_concrete (_, e) ->
      expression e << Dereference

and modexp : Typedtree.module_expr -> term_judg =
  fun mexp -> match mexp.mod_desc with
    | Tmod_ident (pth, _) ->
      path pth
    | Tmod_structure s ->
      structure s
    | Tmod_functor (_, e) ->
      modexp e << Delay
    | Tmod_apply (f, p, _) ->
      join [
        modexp f << Dereference;
        modexp p << Dereference;
      ]
    | Tmod_apply_unit f ->
      modexp f << Dereference
    | Tmod_constraint (mexp, _, _, coe) ->
      let rec coercion coe k = match coe with
        | Tcoerce_none ->
          k Return
        | Tcoerce_structure _
        | Tcoerce_functor _ ->
          (* These coercions perform a shallow copy of the input module,
             by creating a new module with fields obtained by accessing
             the same fields in the input module. *)
           k Dereference
        | Tcoerce_primitive _ ->
          (* This corresponds to 'external' declarations,
             and the coercion ignores its argument *)
          k Ignore
        | Tcoerce_alias (_, pth, coe) ->
          (* Alias coercions ignore their arguments, but they evaluate
             their alias module 'pth' under another coercion. *)
          coercion coe (fun m -> path pth << m)
      in
      coercion coe (fun m -> modexp mexp << m)
    | Tmod_unpack (e, _) ->
      expression e


(* G |- pth : m *)
and path : Path.t -> term_judg =
  (*
    ------------
    x: m |- x: m

    G |- A: m[Dereference]
    -----------------------
    G |- A.x: m

    G1 |- A: m[Dereference]
    G2 |- B: m[Dereference]
    ------------------------ (as for term application)
    G1 + G2 |- A(B): m
  *)
  fun pth -> match pth with
    | Path.Pident x ->
        single x
    | Path.Pdot (t, _) ->
        path t << Dereference
    | Path.Papply (f, p) ->
        join [
          path f << Dereference;
          path p << Dereference;
        ]
    | Path.Pextra_ty (p, _extra) ->
        path p

(* G |- struct ... end : m *)
and structure : Typedtree.structure -> term_judg =
  (*
    G1, {x: _, x in vars(G1)} |- item1: G2 + ... + Gn in m
    G2, {x: _, x in vars(G2)} |- item2: G3 + ... + Gn in m
    ...
    Gn, {x: _, x in vars(Gn)} |- itemn: [] in m
    ---
    (G1 + ... + Gn) - V |- struct item1 ... itemn end: m
  *)
  fun s m ->
    List.fold_right (fun it env -> structure_item it m env)
      s.str_items Env.empty

(* G |- <structure item> : m -| G'
   where G is an output and m, G' are inputs *)
and structure_item : Typedtree.structure_item -> bind_judg =
  fun s m env -> match s.str_desc with
    | Tstr_eval (e, _) ->
      (*
        Ge |- e: m[Guard]
        G |- items: m -| G'
        ---------------------------------
        Ge + G |- (e;; items): m -| G'

        The expression `e` is treated in the same way as let _ = e
      *)
      let judg_e = expression e << Guard in
      Env.join (judg_e m) env
    | Tstr_value (rec_flag, bindings) ->
      value_bindings rec_flag bindings m env
    | Tstr_module {mb_id; mb_expr} ->
      module_binding (mb_id, mb_expr) m env
    | Tstr_recmodule mbs ->
      let bindings = List.map (fun {mb_id; mb_expr} -> (mb_id, mb_expr)) mbs in
      recursive_module_bindings bindings m env
    | Tstr_primitive _ ->
      env
    | Tstr_type _ ->
      (*
        -------------------
        G |- type t: m -| G
      *)
      env
    | Tstr_typext {tyext_constructors = exts; _} ->
      let ext_ids = List.map (fun {ext_id = id; _} -> id) exts in
      Env.join
        (list extension_constructor exts m)
        (Env.remove_list ext_ids env)
    | Tstr_exception {tyexn_constructor = ext; _} ->
      Env.join
        (extension_constructor ext m)
        (Env.remove ext.ext_id env)
    | Tstr_modtype _
    | Tstr_class_type _
    | Tstr_attribute _ ->
      env
    | Tstr_open od ->
      open_declaration od m env
    | Tstr_class classes ->
        let class_ids =
          let class_id ({ci_id_class = id; _}, _) = id in
          List.map class_id classes in
        let class_declaration ({ci_expr; _}, _) m =
          Env.remove_list class_ids (class_expr ci_expr m) in
        Env.join
          (list class_declaration classes m)
          (Env.remove_list class_ids env)
    | Tstr_include { incl_mod = mexp; incl_type = mty; _ } ->
      let included_ids = List.map Types.signature_item_id mty in
      Env.join (modexp mexp m) (Env.remove_list included_ids env)

(* G |- module M = E : m -| G *)
and module_binding : (Ident.t option * Typedtree.module_expr) -> bind_judg =
  fun (id, mexp) m env ->
      (*
        GE |- E: m[mM + Guard]
        -------------------------------------
        GE + G |- module M = E : m -| M:mM, G
      *)
      let judg_E, env =
        match id with
        | None -> modexp mexp << Guard, env
        | Some id ->
          let mM, env = Env.take id env in
          let judg_E = modexp mexp << (Mode.join mM Guard) in
          judg_E, env
      in
      Env.join (judg_E m) env

and open_declaration : Typedtree.open_declaration -> bind_judg =
  fun { open_expr = mexp; open_bound_items = sg; _ } m env ->
      let judg_E = modexp mexp in
      let bound_ids = List.map Types.signature_item_id sg in
      Env.join (judg_E m) (Env.remove_list bound_ids env)

and recursive_module_bindings
  : (Ident.t option * Typedtree.module_expr) list -> bind_judg =
  fun m_bindings m env ->
    let mids = List.filter_map fst m_bindings in
    let binding (mid, mexp) m =
      let judg_E =
        match mid with
        | None -> modexp mexp << Guard
        | Some mid ->
          let mM = Env.find mid env in
          modexp mexp << (Mode.join mM Guard)
      in
      Env.remove_list mids (judg_E m)
    in
    Env.join (list binding m_bindings m) (Env.remove_list mids env)

and class_expr : Typedtree.class_expr -> term_judg =
  fun ce -> match ce.cl_desc with
    | Tcl_ident (pth, _, _) ->
        path pth << Dereference
    | Tcl_structure cs ->
        class_structure cs
    | Tcl_fun (_, _, args, ce, _) ->
        let ids = List.map fst args in
        remove_ids ids (class_expr ce << Delay)
    | Tcl_apply (ce, args) ->
        let arg (_, arg) =
          match arg with
          | Omitted () -> empty
          | Arg e -> expression e
        in
        join [
          class_expr ce << Dereference;
          list arg args << Dereference;
        ]
    | Tcl_let (rec_flag, bindings, _, ce) ->
      value_bindings rec_flag bindings >> class_expr ce
    | Tcl_constraint (ce, _, _, _, _) ->
        class_expr ce
    | Tcl_open (_, ce) ->
        class_expr ce

and extension_constructor : Typedtree.extension_constructor -> term_judg =
  fun ec -> match ec.ext_kind with
    | Text_decl _ ->
      empty
    | Text_rebind (pth, _lid) ->
      path pth

(* G |- let (rec?) (pi = ei)^i : m -| G' *)
and value_bindings : rec_flag -> Typedtree.value_binding list -> bind_judg =
  fun rec_flag bindings mode bound_env ->
    let all_bound_pats = List.map (fun vb -> vb.vb_pat) bindings in
    let outer_env = remove_patlist all_bound_pats bound_env in
    let bindings_env =
      match rec_flag with
      | Nonrecursive ->
        (*
           (Gi, pi:_ |- ei : m[mbody_i])^i   (pi : mbody_i -| D)^i
           ------------------------------------------------------------
           Sum(Gi) + (D - (pi)^i) |- let (pi=ei)^i : m -| D
        *)
          let binding_env {vb_pat; vb_expr; _} m =
            let m' = Mode.compose m (pattern vb_pat bound_env) in
            remove_pat vb_pat (expression vb_expr m') in
          list binding_env bindings mode
      | Recursive ->
        (*
           (Gi, (xj : mdef_ij)^j |- ei : m[mbody_i])^i   (xi : mbody_i -| D)^i
           G'i = Gi + mdef_ij[G'j]
           -------------------------------------------------------------------
           Sum(G'i) + (D - (pi)^i) |- let rec (xi=ei)^i : m -| D

           The (mdef_ij)^i,j are a family of modes over two indices:
           mdef_ij represents the mode of use, within e_i the definition of x_i,
           of the mutually-recursive variable x_j.

           The (G'i)^i are defined from the (Gi)^i as a family of equations,
           whose smallest solution is computed as a least fixpoint.

           The (Gi)^i are the "immediate" dependencies of each (ei)^i
           on the outer context (excluding the mutually-defined
           variables).
           The (G'i)^i contain the "transitive" dependencies as well:
           if ei depends on xj, then the dependencies of G'i of xi
           must contain the dependencies of G'j, composed by
           the mode mdef_ij of use of xj in ei.

           For example, consider:

             let rec z =
               let rec x = ref y
               and y = ref z
               in f x

           this definition should be rejected as the body [f x]
           dereferences [x], which can be used to access the
           yet-unitialized value [z]. This requires realizing that [x]
           depends on [z] through [y], which requires the transitive
           closure computation.

           An earlier version of our check would take only the (Gi)^i
           instead of the (G'i)^i, which is incorrect and would accept
           the example above.
        *)
          (* [binding_env] takes a binding (x_i = e_i)
             and computes (Gi, (mdef_ij)^j). *)
          let binding_env {vb_pat = x_i; vb_expr = e_i; _} =
            let mbody_i = pattern x_i bound_env in
            (* Gi, (x_j:mdef_ij)^j  *)
            let rhs_env_i = expression e_i (Mode.compose mode mbody_i) in
            (* (mdef_ij)^j (for a fixed i) *)
            let mutual_modes =
              let mdef_ij {vb_pat = x_j; _} = pattern x_j rhs_env_i in
              List.map mdef_ij bindings in
            (* Gi *)
            let env_i = remove_patlist all_bound_pats rhs_env_i in
            (* (Gi, (mdef_ij)^j) *)
            (env_i, mutual_modes) in
          let env, mdef =
            List.split (List.map binding_env bindings) in
          let rec transitive_closure env =
            let transitive_deps env_i mdef_i =
              (* Gi, (mdef_ij)^j => Gi + Sum_j mdef_ij[Gj] *)
              Env.join env_i
                (Env.join_list (List.map2 Env.compose mdef_i env)) in
            let env' = List.map2 transitive_deps env mdef in
            if List.for_all2 Env.equal env env'
            then env'
            else transitive_closure env'
          in
          let env'_i = transitive_closure env in
          Env.join_list env'_i
    in Env.join bindings_env outer_env

(* G; m' |- (p -> e) : m
   with outputs G, m' and input m

   m' is the mode under which the scrutinee of p
   (the value matched against p) is placed.
*)
and case
    : 'k . 'k Typedtree.case -> mode -> Env.t * mode
  = fun { Typedtree.c_lhs; c_guard; c_rhs } ->
    (*
       Ge |- e : m    Gg |- g : m[Dereference]
       G := Ge+Gg     p : mp -| G
       ----------------------------------------
       G - p; m[mp] |- (p (when g)? -> e) : m
    *)
    let judg = join [
        option expression c_guard << Dereference;
        expression c_rhs;
      ] in
    (fun m ->
       let env = judg m in
       (remove_pat c_lhs env), Mode.compose m (pattern c_lhs env))

(* p : m -| G
   with output m and input G

   m is the mode under which the scrutinee of p is placed.
*)
and pattern : type k . k general_pattern -> Env.t -> mode = fun pat env ->
  (*
    mp := | Dereference if p is destructuring
          | Guard       otherwise
    me := sum{G(x), x in vars(p)}
    --------------------------------------------
    p : (mp + me) -| G
  *)
  let m_pat = if is_destructuring_pattern pat
              then Dereference
              else Guard
  in
  let m_env =
    pat_bound_idents pat
    |> List.map (fun id -> Env.find id env)
    |> List.fold_left Mode.join Ignore
  in
  Mode.join m_pat m_env

and is_destructuring_pattern : type k . k general_pattern -> bool =
  fun pat -> match pat.pat_desc with
    | Tpat_any -> false
    | Tpat_var (_, _, _) -> false
    | Tpat_alias (pat, _, _, _, _) -> is_destructuring_pattern pat
    | Tpat_constant _ -> true
    | Tpat_tuple _ -> true
    | Tpat_construct _ -> true
    | Tpat_variant _ -> true
    | Tpat_record (_, _) -> true
    | Tpat_array _ -> true
    | Tpat_lazy _ -> true
    | Tpat_value pat -> is_destructuring_pattern (pat :> pattern)
    | Tpat_exception _ -> false
    | Tpat_or (l,r,_) ->
        is_destructuring_pattern l || is_destructuring_pattern r

let is_valid_recursive_expression idlist expr : sd option =
  match expr.exp_desc with
  | Texp_function _ ->
     (* Fast path: functions can never have invalid recursive references *)
     Some Static
  | _ ->
     let rkind = classify_expression expr in
     let is_valid =
       match rkind with
       | Static ->
         (* The expression has known size or is constant *)
         let ty = expression expr Return in
         Env.unguarded ty idlist = []
       | Dynamic ->
         (* The expression has unknown size *)
         let ty = expression expr Return in
         Env.unguarded ty idlist = [] && Env.dependent ty idlist = []
     in
     if is_valid then Some rkind else None

(* A class declaration may contain let-bindings. If they are recursive,
   their validity will already be checked by [is_valid_recursive_expression]
   during type-checking. This function here prevents a different kind of
   invalid recursion, which is the unsafe creations of objects of this class
   in the let-binding. For example,
   {|class a = let x = new a in object ... end|}
   is forbidden, but
   {|class a = let x () = new a in object ... end|}
   is allowed.
*)
let is_valid_class_expr idlist ce =
  let rec class_expr : mode -> Typedtree.class_expr -> Env.t =
    fun mode ce -> match ce.cl_desc with
      | Tcl_ident (_, _, _) ->
        (*
          ----------
          [] |- a: m
        *)
        Env.empty
      | Tcl_structure _ ->
        (*
          -----------------------
          [] |- struct ... end: m
        *)
        Env.empty
      | Tcl_fun (_, _, _, _, _) -> Env.empty
        (*
          ---------------------------
          [] |- fun x1 ... xn -> C: m
        *)
      | Tcl_apply (_, _) -> Env.empty
      | Tcl_let (rec_flag, bindings, _, ce) ->
        value_bindings rec_flag bindings mode (class_expr mode ce)
      | Tcl_constraint (ce, _, _, _, _) ->
        class_expr mode ce
      | Tcl_open (_, ce) ->
        class_expr mode ce
  in
  match Env.unguarded (class_expr Return ce) idlist with
  | [] -> true
  | _ :: _ -> false
